1 Introduction

The objective of this notebook is to exclude the clusters of doublets or poor-quality cells.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(tidyverse)

2.2 Parameters

# Paths
path_to_doublets <- here::here("scRNA-seq/3-clustering/2-level_2/tmp/doublets_multiome_df_all.rds")

path_to_dir <- here::here("scATAC-seq/results/R_objects/level_2/")
path_to_obj <- str_c(
  path_to_dir,
  params$cell_type,
  "/",
  params$cell_type,
  "_integrated_level_2.rds",
  sep = ""
)
path_to_save <- str_c(
  path_to_dir,
  params$cell_type,
  "/",
  params$cell_type,
  "_clustered_filtered_level_2.rds",
  sep = ""
)


# Functions
source(here::here("scRNA-seq/bin/utils.R"))


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "chocolate1", "coral2", "blueviolet",
                   "brown1", "darkmagenta", "deepskyblue1", "dimgray",
                   "deeppink1", "green", "lightgray", "hotpink1",
                   "indianred4", "khaki", "mediumorchid2")

# Resolutions
resolutions <- c(
  NBC_MBC = 0.025,
  GCBC = 0.05,
  CD4_T = 0.025,
  Cytotoxic = 0.01,
  PC = 0.01
)
resolution <- resolutions[params$cell_type]
names(resolution) <- NULL


# Clusters to exclude
cluster_to_exclude_all <- list(
  NBC_MBC = c("1"),
  GCBC = c("1"),
  CD4_T = c(as.character(seq(1,3))),
  Cytotoxic = c(as.character(seq(1,3))),
  PC = c(as.character(seq(1,8)))
)
cluster_to_exclude <- cluster_to_exclude_all[[params$cell_type]]

2.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 270784 features across 27497 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 267464 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony

3 Cluster

Here, we cluster with the resolution that allows us to separate our clusters of interest, which was decided by inspecting the results of previous notebooks.

seurat <- FindClusters(seurat, resolution = resolution)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 27497
## Number of edges: 810353
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9560
## Number of communities: 5
## Elapsed time: 4 seconds
p <- DimPlot(seurat, cols = color_palette,label = T)
p

4 Exclude doublets and poor-quality cells

4.1 Exclude doublets directly from scRNAseq data using the Multiome approach

4.1.1 Visualize before filtering

multiome_doublets <- readRDS(path_to_doublets)
doublets_cells <- colnames(seurat)[which(colnames(seurat) %in% multiome_doublets$barcode)]
length(doublets_cells)
## [1] 1571
DimPlot(
  seurat, reduction = "umap",
  cols.highlight = "darkred", cols= "grey",
  cells.highlight= doublets_cells,
  pt.size = 0.1
)

4.1.2 Visualize after filtering

selected_cells_doublets <- colnames(seurat)[which(!colnames(seurat) %in% doublets_cells)]
seurat <- subset(seurat, cells = selected_cells_doublets)
seurat
## An object of class Seurat 
## 270784 features across 25926 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 267464 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(
  seurat, reduction = "umap",
  pt.size = 0.1
)

4.2 Exclude problematic clusters

print("Clusters to exclude")
## [1] "Clusters to exclude"
cluster_to_exclude
## [1] "1"
selected_cells <- colnames(seurat)[!(seurat$seurat_clusters %in% cluster_to_exclude)]
length(selected_cells)
## [1] 23893
seurat <- subset(seurat, cells = selected_cells)
seurat
## An object of class Seurat 
## 270784 features across 23893 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 267464 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony

4.2.1 Visualize after filtering

p2 <- DimPlot(seurat, cols = color_palette, label = T)
p2

5 Save

saveRDS(seurat, path_to_save)

6 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] es_ES.UTF-8/UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Signac_1.1.0.9000 forcats_0.5.0     stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4       readr_1.4.0       tidyr_1.1.2       tibble_3.0.4      ggplot2_3.3.2     tidyverse_1.3.0   Seurat_3.9.9.9010 BiocStyle_2.16.1 
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1              SnowballC_0.7.0             rtracklayer_1.48.0          GGally_2.0.0                bit64_4.0.5                 knitr_1.30                  irlba_2.3.3                 DelayedArray_0.14.0         data.table_1.13.2           rpart_4.1-15                RCurl_1.98-1.2              AnnotationFilter_1.12.0     generics_0.1.0              BiocGenerics_0.34.0         GenomicFeatures_1.40.1      cowplot_1.1.0               RSQLite_2.2.1               RANN_2.6.1                  future_1.20.1               bit_4.0.4                   spatstat.data_2.1-0         xml2_1.3.2                  lubridate_1.7.9             httpuv_1.5.4                SummarizedExperiment_1.18.1 assertthat_0.2.1            xfun_0.18                   hms_0.5.3                   evaluate_0.14               promises_1.1.1              fansi_0.4.1                 progress_1.2.2              dbplyr_1.4.4                readxl_1.3.1                igraph_1.2.6                DBI_1.1.0                   htmlwidgets_1.5.2           reshape_0.8.8               stats4_4.0.3                ellipsis_0.3.1              backports_1.2.0             bookdown_0.21              
##  [43] biomaRt_2.44.4              deldir_0.2-3                vctrs_0.3.4                 Biobase_2.48.0              here_1.0.1                  ensembldb_2.12.1            ROCR_1.0-11                 abind_1.4-5                 withr_2.3.0                 ggforce_0.3.2               BSgenome_1.56.0             checkmate_2.0.0             sctransform_0.3.1           GenomicAlignments_1.24.0    prettyunits_1.1.1           goftest_1.2-2               cluster_2.1.0               lazyeval_0.2.2              crayon_1.3.4                pkgconfig_2.0.3             labeling_0.4.2              tweenr_1.0.1                GenomeInfoDb_1.24.0         nlme_3.1-150                ProtGenerics_1.20.0         nnet_7.3-14                 rlang_0.4.11                globals_0.13.1              lifecycle_0.2.0             miniUI_0.1.1.1              BiocFileCache_1.12.1        modelr_0.1.8                rsvd_1.0.3                  dichromat_2.0-0             cellranger_1.1.0            rprojroot_2.0.2             polyclip_1.10-0             matrixStats_0.57.0          lmtest_0.9-38               graph_1.66.0                Matrix_1.3-2                ggseqlogo_0.1              
##  [85] zoo_1.8-8                   reprex_0.3.0                base64enc_0.1-3             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                KernSmooth_2.23-17          Biostrings_2.56.0           blob_1.2.1                  parallelly_1.21.0           jpeg_0.1-8.1                S4Vectors_0.26.0            scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  ica_1.0-2                   zlibbioc_1.34.0             compiler_4.0.3              RColorBrewer_1.1-2          fitdistrplus_1.1-1          Rsamtools_2.4.0             cli_2.1.0                   XVector_0.28.0              listenv_0.8.0               patchwork_1.1.0             pbapply_1.4-3               ps_1.4.0                    htmlTable_2.1.0             Formula_1.2-4               MASS_7.3-53                 mgcv_1.8-33                 tidyselect_1.1.0            stringi_1.5.3               yaml_2.2.1                  askpass_1.1                 latticeExtra_0.6-29         ggrepel_0.8.2               grid_4.0.3                  VariantAnnotation_1.34.0    fastmatch_1.1-0            
## [127] tools_4.0.3                 future.apply_1.6.0          parallel_4.0.3              rstudioapi_0.12             foreign_0.8-80              lsa_0.73.2                  gridExtra_2.3               farver_2.0.3                Rtsne_0.15                  digest_0.6.27               BiocManager_1.30.10         shiny_1.5.0                 Rcpp_1.0.5                  GenomicRanges_1.40.0        broom_0.7.2                 later_1.1.0.1               RcppAnnoy_0.0.16            OrganismDbi_1.30.0          httr_1.4.2                  AnnotationDbi_1.50.3        ggbio_1.36.0                biovizBase_1.36.0           colorspace_2.0-0            rvest_0.3.6                 XML_3.99-0.3                fs_1.5.0                    tensor_1.5                  reticulate_1.18             IRanges_2.22.1              splines_4.0.3               uwot_0.1.8.9001             RBGL_1.64.0                 RcppRoll_0.3.0              spatstat.utils_2.1-0        plotly_4.9.2.1              xtable_1.8-4                jsonlite_1.7.1              spatstat_1.64-1             R6_2.5.0                    Hmisc_4.4-1                 pillar_1.4.6                htmltools_0.5.1.1          
## [169] mime_0.9                    glue_1.4.2                  fastmap_1.0.1               BiocParallel_1.22.0         codetools_0.2-17            lattice_0.20-41             curl_4.3                    leiden_0.3.5                openssl_1.4.3               survival_3.2-7              rmarkdown_2.5               munsell_0.5.0               GenomeInfoDbData_1.2.3      haven_2.3.1                 reshape2_1.4.4              gtable_0.3.0